library(ggplot2)
library(dplyr)
library(tidyr)
library(grDevices)

path_to_data <- "data"
raw_survey_file <- "follow_up_survey_rakuten.csv"

path_to_figures <- "figures"
appdx_plots <- "appendix_plots"

uni_comp_plot <- "university_comparison_plot.eps"

followup_survey <- read.csv(file.path(path_to_data, raw_survey_file), encoding = "UTF-8")

followup_survey$Q233_1 <- as.numeric(followup_survey$Q233_1)
followup_survey$Q233_2 <- as.numeric(followup_survey$Q233_2)
followup_survey$Q233_4 <- as.numeric(followup_survey$Q233_4)

followup_survey <- followup_survey %>% 
  rename(`Elite University` = Q233_1, 
         `Local University` = Q233_2,
         `Non-Local University` = Q233_4)

# stack dataset for plot 
plot_format <- followup_survey %>% 
  dplyr::select(all_of(c("Elite University", 
                       "Local University",
                       "Non-Local University"))) %>% 
  gather()

# calculate means of each category
followup_survey %>% 
  dplyr::select(all_of(c("Elite University", 
                         "Local University",
                         "Non-Local University"))) %>% 
  sapply(mean, na.rm = TRUE)


university_comparison_plot <- plot_format %>%
  filter(!is.na(value)) %>% 
  ggplot(aes(x = value, fill = key)) +
  geom_density(alpha = 0.2) + 
  theme_bw() + 
  scale_x_continuous(name = "Evaluation of Academic Strength") + 
  scale_fill_discrete(name = "University Type")


ggsave(
  filename = file.path(path_to_figures, appdx_plots, uni_comp_plot),
  plot = university_comparison_plot,
  height = 7, 
  width = 7,
  units = "in",
  device = cairo_ps,
  fallback_resolution = 200)

